# Replication script for
#
# "Political Divisions in Large Cities:
# The Socio-Spatial Basis of Legislative Behavior in Chicago and Toronto"
# Journal of Politics
#
# TORONTO ANALYSIS
#
# November 2, 2023
#
# Zack Taylor, University of Western Ontario, zack.taylor@uwo.ca
# David A. Armstrong II, University of Western Ontario, dave.armstrong@uwo.ca

library(tibble)
library(dplyr)  
library(purrr)
library(ggplot2)
library(tidyr)
library(stringr)
library(corrr)
library(rio)
library(readstata13)
library(cowplot)
library(scales)
library(progress)
library(emIRT)
library(sf)
library(sp)
library(spdep)
library(ggrepel)
library(ggridges)
library(ggpubr)
library(pscl)
library(wnominate)
library(dwnominate)

# legR requires prior installation of the h2o library
# https://docs.h2o.ai/h2o/latest-stable/h2o-docs/downloading.html
library(h2o)

# the legR package must be downloaded from GitHub
library(remotes)
remotes::install_github(repo = "davidaarmstrong/legr",
                        upgrade = "always")
library(legR)

# SET UP CONSOLE LOG FILE ----

toronto_log <- file("toronto_log.txt")
sink(toronto_log, append = TRUE, type = "output") # Writing console output to log file
sink(toronto_log, append = TRUE, type = "message")
cat(readChar(rstudioapi::getSourceEditorContext()$path, # Writing currently opened R script to file
             file.info(rstudioapi::getSourceEditorContext()$path)$size))

# GENERAL ----

# set seed
#seed = 5282 # maximized the sum of |correlation| 
seed <- 3094 # maximizes the number of entries in the Pooled column on D2
set.seed(seed)
sims = 100 # Set number of simulations for Appendix E
seeds <- round(runif(sims*2, 1, 10000)) # Set seeds for Appendix E

# plot theme for Appendix D
theme_map <- function(base_size = 12) {
  theme_grey(base_size) %+replace%
    theme(
      panel.background = element_rect(fill = "transparent",colour = NA),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent",colour = NA),
      axis.ticks = element_blank(),
      axis.text = element_blank(),
      plot.margin = unit(c(0,0,0,0), unit = "pt", "lines"),
      legend.background = element_rect(fill = "transparent", color = NA),
      legend.key = element_rect(fill = "black", color = NA),
      legend.text = element_text(color = "black"),
      legend.title = element_text(color = "black"),
      legend.box = "vertical",
      legend.position = "right",
      complete = TRUE
    )}

# transpose function
trans <- function(m){
  as_tibble(t(m)) %>% 
    mutate(delta = colnames(m)) %>% 
    separate(delta, into=c("start", "end")) %>% 
    mutate(across(c("start", "end"), as.numeric))
}

# create figures object
figures <- NULL

# TORONTO ----

city = "Toronto"

load("source/tor_rollcalls.rda")

tor_hist <- read.dta13("source/tor_ward_councilors.dta")

m <- colMeans(X, na.rm=TRUE)
unan <- tibble(
  m = m, 
  term = final_vote$term
)
unan <- unan %>% 
  mutate(m = ifelse(m < .5, 1-m, m)) %>% 
  filter(m < .95)

uterms <- c("1998-2000 Lastman","2000-03 Lastman","2003-06 Miller","2006-10 Miller", "2010-14 Ford", "2014-18 Tory", "2018-22 Tory")
names(uterms) <- c(1,2,3,4,5,6,7)

## Calculate number of cells, votes, structural missings
n_votes <- sum(!is.na(c(X)))
tor_trm <- final_vote$term
s <- split(as.data.frame(t(X)), tor_trm)
struct_miss <- sapply(s, \(d)sum(apply(d, 2, \(x)if(all(is.na(x))){length(x)}else{0})))
struct_miss <- sum(struct_miss)
idio_miss <- sum(is.na(c(X))) - struct_miss
l <- lapply(s, \(d)apply(d, 2, \(x)c(n_miss = sum(is.na(x)), n_tot = length(x))))
allmiss <- sapply(l, \(x)x[1,])
all_v <- sapply(l, \(x)x[2,])
idio_miss_v <- sapply(1:ncol(allmiss), \(i){
  ifelse(allmiss[,i] == all_v[,i], 0, allmiss[,i])
})
idio_miss_pct <- idio_miss_v/all_v
idio_miss_pct <- apply(idio_miss_pct, 1, \(x)mean(x[which(x > 0)]))
sprintf("Number of Votes:  %.0f", n_votes)
sprintf("Number of Cells: %.0f", prod(dim(X)))
sprintf("Number of Observed Votes: %0.f", sum(!is.na(c(X))))
sprintf("Percentage of Missed Votes: %.2f%%", mean(idio_miss_pct)*100)

# Calculate proportion in modal category
tabs <- apply(X, 2, function(x)table(factor(x, levels=c(0,1))))
mcvotes <- sum(apply(tabs, 2, max))
sprintf("Number in the Modal Category %.0f", mcvotes)
sprintf("Proportion in the Modal Category: %.2f", mcvotes/n_votes)

## Calculate Agreement Scores

a <- function(x,y){
  tmp <- rcm[c(x,y), ]
  tmp <- t(tmp)
  tmp <- na.omit(tmp)
  sum(tmp[,1] == tmp[,2])/nrow(tmp)
}
rcm <- X

o <- outer(1:nrow(rcm), 1:nrow(rcm), Vectorize(a))
a <- o[upper.tri(o)]
a <- ifelse(a < .5, 1-a, a)
tor_agree <- a

sprintf("Median Agreement Score: %.0f%%", median(na.omit(tor_agree))*100)

## force orthogonality
orthogonal <- TRUE

## use legR to generate data and priors


out <- legR(
  X, 
  terms=final_vote$term, 
  #    terms=tmp_trm, 
  legis_data=h_all, 
  minprop=.2, 
  nRounds = 1,
  othermax=.35, 
  minperterm=10, 
  k=2, 
  seed = seed, 
  ndim=2, 
  method="glrm", 
  est_model = FALSE
)

## set polarity anchors on priors
priors <- out$priors
priors[[1]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[1]]$x.mu0 ))
priors[[2]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[2]]$x.mu0 ))
priors[[1]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[1]]$x.sigma0 ))
priors[[2]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[2]]$x.sigma0 ))
priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "D.Holyday"),1] <- 1
priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Mihevc"),1] <- -1
priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "McConnell"),1] <- 1
priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Ootes"),1] <- -1
priors[[1]]$x.sigma0[which(out$dats[[1]]$id$name %in% c("D.Holyday", "Mihevc"))] <- .1
priors[[2]]$x.sigma0[which(out$dats[[2]]$id$name %in% c("Ootes", "McConnell"))] <- .1

## estimate model with priors set
out <- legR(
  X, 
  terms=final_vote$term, 
  #    terms=tmp_trm, 
  legis_data=h_all, 
  priors = priors, 
  minprop=.2, 
  nRounds = 1,
  othermax=.35, 
  minperterm=10, 
  k=2, 
  seed = seed, 
  ndim=2, 
  method="glrm", 
  max_mem_size="8g", 
  est_model = TRUE
)

tor_legr_out <- out
save(tor_legr_out, file="temp/tor_legr_out.rda")

## Collect results, ensure consistent polarity across time and orthogonalize
x <- gather_data(out)
x <- x %>% dplyr::filter(!((Dim1 == 0 | is.na(Dim1)) & (Dim2 == 0 | is.na(Dim2))))
x <- x %>% dplyr::arrange(session, name)
{if(orthogonal){
  flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
  x <- dplyr::left_join(x, flipx %>% mutate(session = as.numeric(session)))
  ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
  x <- x %>% dplyr::mutate(Dim1 = -Dim1*flip1,
                    Dim2 = Dim2*flip2, 
                    Dim1 = -Dim1) %>% 
    dplyr::select(-contains("flip")) 
  xs <- x %>% dplyr::group_split(session) %>% purrr::map(function(x){
    o <- orthogonalize(x)
    o <- o %>% dplyr::select(-Dim1, -Dim2) %>% dplyr::rename(Dim1=Dim1_gs, Dim2=Dim2_gs)
    o})
  x <- bind_rows(xs)
  }else{
  flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
  x <- left_join(x, flipx %>% mutate(session = as.numeric(session)))
  ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
  x <- x %>% mutate(Dim1 = -Dim1*flip1,
                    Dim2 = Dim2*flip2, 
                    Dim1 = -Dim1, 
                    Dim2 = ifelse(session == 2, -Dim2, Dim2)) %>% 
    select(-contains("flip")) %>% 
    mutate(Dim2 = ifelse(session %in% c(1,3), -Dim2, Dim2)) 
}
}

# used in Appendix E
tor_baseline <- x
save(tor_baseline, file="temp/tor_baseline.rda")

## Figure 1: Toronto PRE bar plot ----

trm <- final_vote$term
pb <- progress_bar$new(total = ncol(X))
res <- NULL
for(i in 1:ncol(X)){
  pb$tick()
  tmp.trm <- trm[i]
  dat <- tibble(vote = X[,i], name = h_all$name)
  lat <- x %>% filter(session == tmp.trm)
  dat <- inner_join(dat, lat, by="name") %>% na.omit()
  if(sum(dat$vote == 1, na.rm=TRUE) > 0 & sum(dat$vote == 0, na.rm=TRUE) > 0){
    m1 <- suppressWarnings(glm(vote ~ Dim1, data=dat, family=binomial))
    m12 <- suppressWarnings(glm(vote ~ Dim1 + Dim2, data=dat, family=binomial))
    ce1 <- sum(m1$y != round(m1$fitted))
    ce12 <- sum(m12$y != round(m12$fitted))
    m2 <- suppressWarnings(glm(vote ~ Dim2, data=dat, family=binomial))
    ce2 <- sum(m2$y != round(m2$fitted))
    minvotes <- min(table(na.omit(dat$vote)))
    nvotes <- sum(!is.na(dat$vote))
    res <- rbind(res, data.frame(session=tmp.trm, 
                                 minvote = minvotes, 
                                 ce1 = ce1, 
                                 ce2 = ce2, 
                                 ce12 = ce12, 
                                 n=nvotes))
  }
}
legr_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1 = sum(minvote - ce1)/sum(minvote), 
            apre2 = sum(minvote - ce2)/sum(minvote), 
            apre12 = sum(minvote - ce12)/sum(minvote)) %>% 
  mutate(apre_diff = apre12-apre1)

legr_apre <- res %>% 
  summarise(apre1 = sum(minvote - ce1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2 = sum(minvote - ce2, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12 = sum(minvote - ce12, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff = apre12-apre1)

tor_apre <- bind_rows(legr_apre %>% mutate(session = 0) %>% select(session, everything()), legr_apre_by_term) %>% 
  mutate(city = "Toronto", 
         session = factor(session, 
                          levels=0:7, 
                          labels= c("Overall", "1998-2000 Lastman", "2000-2003 Lastman", 
                                    "2003-2006 Miller", "2006-2010 Miller", 
                                    "2010-2014 Ford", "2014-2018 Tory", "2018-2022 Tory"))) %>% 
  mutate(apre12 = apre12-apre1) %>% 
  pivot_longer(c("apre1", "apre12"), names_to="dimension", values_to="apre") %>% 
  mutate(dimension = factor(dimension, 
                            levels=c("apre12", "apre1"), 
                            labels=c("Dimension 1+2", "Dimension 1")))

sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12))/sum(res$n))*100)

save(tor_apre, file="temp/tor_apre.rda")

cat("Percentage of PRE\n")
legr_apre_by_term %>% 
  mutate(pct1 = round(apre1/apre12*100), 
         pct2 = round((apre12-apre1)/apre12*100), 
         session = factor(session, labels=uterms)) %>% # changed labels to uterms from terms
  select(session, pct1, pct2) %>% 
  as.data.frame()

figures[['fig1tor_apre']] <- tor_apre %>% 
  ggplot(aes(y=apre, fill=dimension)) + 
  geom_bar(aes(x=rep(c(-.5, 1:7), each=2)), stat="identity")  + 
  theme_classic() + 
  labs(x="", y="APRE", fill="") + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.25), 
        text = element_text(size = 8),
        legend.position=c(.275, 1), 
        legend.background = element_blank(), 
        legend.key = element_rect(fill = "transparent", colour = "transparent")) + 
  scale_fill_manual(values = c("grey70", "grey30")) +
  scale_y_continuous(labels=label_percent(accuracy=1)) + 
  scale_x_continuous(breaks = c(-.5, 1:7), labels=levels(tor_apre$session)) + 
  guides(fill=guide_legend(nrow=1,byrow=TRUE))


## Figure 2: Toronto Movements  ----
set.seed(seed)
### Analysis of changing councilor positions
boot1 <- boot_emIRT(out$mods[[1]], 
                    .data=out$dats[[1]]$dat, 
                    .starts= out$starts[[1]], 
                    .priors= out$priors[[1]], 
                    Ntrials = 100, 
                    .control=list(threads = 4, verbose = TRUE, thresh = 1e-4, maxit=200, checkfreq=1))
boot2 <- boot_emIRT(out$mods[[2]], 
                    .data=out$dats[[2]]$dat, 
                    .starts= out$starts[[2]], 
                    .priors= out$priors[[2]], 
                    Ntrials = 100, 
                    .control=list(threads = 4, verbose = TRUE, thresh = 1e-4, maxit=200, checkfreq=1))

x1w <- x %>% 
  select(name, session, Dim1) %>%
  mutate(session = paste0("term_", session)) %>% 
  pivot_wider(names_from = "session", values_from = "Dim1", values_fill=NA)

x2w <- x %>% 
  select(name, session, Dim2) %>%
  mutate(session = paste0("term_", session)) %>% 
  pivot_wider(names_from = "session", values_from = "Dim2", values_fill=NA)

b1s <- boot1$bse$x
b2s <- boot2$bse$x

x1w <- x1w[match(h_all$name, x1w$name), ]

x1l <- x1w %>% 
  pivot_longer(-name, names_pattern="term_(.*)", names_to="term", values_to = "vals")
x1l <- x1l %>% mutate(sd = c(t(b1s)))
x1l <- x1l %>% mutate(term = as.numeric(term))
x1l <- na.omit(x1l)
x1l <- x1l %>% 
  group_by(name) %>% 
  filter(n() >= 3)

draws <- replicate(1500, rnorm(nrow(x1l), x1l$vals, x1l$sd))
colnames(draws) <- paste0("drw", 1:1500)
x1l <- cbind(x1l, draws)
x1l2 <- x1l %>% 
  pivot_longer(starts_with('drw'), names_to = "draw", values_to = "sims")
x1l2 <- x1l2 %>% 
  group_by(name, draw) %>% 
  transmute(term = term, 
          sim_diff = sims - lag(sims), 
          val_diff = vals - lag(vals))

d1move <- x1l2 %>% group_by(name, term, val_diff) %>% 
  summarise(p = mean(sim_diff > 0)) %>% 
  mutate(p = ifelse(p < .5, 1-p, p)) %>% 
  na.omit()


x2w <- x2w[match(h_all$name, x2w$name), ]

x2l <- x2w %>% 
  pivot_longer(-name, names_pattern="term_(.*)", names_to="term", values_to = "vals")
x2l <- x2l %>% mutate(sd = c(t(b2s)))
x2l <- x2l %>% mutate(term = as.numeric(term))
x2l <- na.omit(x2l)
x2l <- x2l %>% 
  group_by(name) %>% 
  filter(n() >= 3)

draws <- replicate(1500, rnorm(nrow(x2l), x2l$vals, x2l$sd))
colnames(draws) <- paste0("drw", 1:1500)
x2l <- cbind(x2l, draws)
x2l2 <- x2l %>% 
  pivot_longer(starts_with('drw'), names_to = "draw", values_to = "sims")
x2l2 <- x2l2 %>% 
  group_by(name, draw) %>% 
  transmute(term = term, 
            sim_diff = sims - lag(sims), 
            val_diff = vals - lag(vals))

d2move <- x2l2 %>% group_by(name, term, val_diff) %>% 
  summarise(p = mean(sim_diff > 0)) %>% 
  mutate(p = ifelse(p < .5, 1-p, p)) %>% 
  na.omit()

df2 <- bind_rows(
  d1move %>% mutate(f = "Dimension 1"), 
  d2move %>% mutate(f = "Dimension 2"))



df_ag <- df2 %>% 
  mutate(cutd = cut(abs(val_diff), breaks=seq(0, 3.5, by=.25)), 
         sig = ifelse(p > .9, "Yes", "No")) %>% 
  group_by(f, cutd, sig) %>% 
  tally %>% 
  ungroup %>% 
  group_by(f) %>% 
  mutate(pct = n/sum(n), 
         x=as.numeric(cutd)*.25-.125)


figures[['fig2tor_movement']] <- ggplot(df_ag, aes(x=x, y=pct, fill=sig)) + 
  geom_bar(width=.25, alpha=.7, stat="identity", position="identity") + 
  facet_wrap(~f) + 
  theme_bw() +
  theme(legend.position = c(.95, .95 ),
        legend.justification = c("right", "top"),
        legend.key.size = unit(.125, "in"),
        axis.ticks = element_line(colour = 'black', size = 0.2),
        text = element_text(size = 8),
        strip.background = element_blank(),
        panel.grid=element_blank()) + 
  scale_fill_manual(values = c("black", "grey80")) +
  scale_y_continuous(labels = scales::label_percent()) + 
  labs(x="Difference (t - [t-1])", 
       y="Percentage of Changes", 
       fill="Credible\nChange")

## Figures 5a + 5b: Toronto Correlation Tables ----
terms <- c("Pooled","1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")

data <- read.dta13("source/tor_ward_data.dta") %>%
  mutate(session = term, 
         termlabel = terms[-1][session]) %>% 
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | term | termlabel | session | wardnum) %>% 
  filter(wardnum != 0)
tor_hist <- read.dta13("source/tor_ward_councilors.dta")

data <- left_join(tor_hist %>% select(wardnum, session, name), data)

data <- data %>% 
  left_join(x %>% 
              mutate(Dim2 = ifelse(session %in% 6:7, -Dim2, Dim2)) %>% 
              rename(legR_D1 = Dim1, legR_D2 = Dim2)) %>% 
  select(-starts_with("Dim"), -session, -name, -wardnum, -term)

rownames(data) <- NULL

varname_lookup <- rio::import("source/varname_lookup.xlsx")
varname_lookup <- bind_rows(varname_lookup, tibble(variable = "v_lfosci", varname = "% Science/Tech Jobs", vartype="status"))

data <- data %>% 
  mutate(legR_D2 = ifelse(termlabel %in% c("2014-2018", "2018-2022"), -legR_D2, legR_D2 ))

byterm <- split(data, f = data$termlabel) 



byterm[["Pooled"]] <- data  # add whole dataset as "pooled" item  

terms <- c("Pooled","1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")

cormatrix <- NULL
cormatrix_d1 <- NULL
cormatrix_d2 <- NULL

for(t in terms){
  
  byterm[[t]] <- byterm[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  # if(t %in% c("2010-2014", "2014-2018")){
  #   byterm[[t]]$legR_D2 <- -byterm[[t]]$legR_D2
  # }
  cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table
  
  cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
    focus(legR_D1) %>%
    rename(!!paste0(t) := legR_D1)        # rename coefficient variable to termlabela
  cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
    focus(legR_D2) %>%
    rename(!!paste0(t) := legR_D2)        # rename coefficient variable to termlabel
  
}

# drop coefficients >= -.2 | <= .2
# drop if more than 3 NAs in row, Dim variables

cortable_d1 <- bind_cols(cormatrix_d1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2 <- bind_cols(cormatrix_d2) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

### Figure 5a: Toronto, Dimension 1 ----
tmp1 <- cortable_d1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1$np))-.5
tmp1 <- tmp1 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) >= .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['fig5ator_cor_d1']] <- ggplot(tmp1, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1$fac)), labels=levels(tmp1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1$fac))+.5), expand=FALSE) 

### Figure 5b: Toronto, Dimension 2 ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2 <- cortable_d2 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2$np))+.5
tmp2 <- tmp2 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) >= .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['fig5btor_cor_d2']] <- ggplot(tmp2, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2$fac)), labels=levels(tmp2$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2$fac))+.5), expand=FALSE) 

# Appendix ----

## Figure A1: Toronto Number of Bills ----
tab1 <- table(out$dats[[1]]$dat$bill.session + 1)
tab2 <- table(out$dats[[2]]$dat$bill.session + 1)
ndf <- data.frame(
  n = c(tab1, tab2), 
  session = rep(1:7, 2), 
  Dimension = rep(1:2, each=7)
)

figures[['figA1tor_votes']] <- ggplot(ndf, aes(x=as.factor(session), y=as.factor(Dimension), fill=log(n))) + 
  geom_tile(show.legend = FALSE) + 
  geom_text(aes(label = n), color="white") + 
  #  scale_fill_viridis_c() + 
  theme_classic() + 
  labs(x="Session", y="Dimension") + 
  coord_cartesian(expand=FALSE) +
  ggtitle("Toronto")

## Figure A2: Toronto Betas ----
betas <- tibble(
  beta = c(out$mods[[1]]$means$beta, out$mods[[2]]$means$beta), 
  session = c(out$dats[[1]]$dat$bill.session, out$dats[[2]]$dat$bill.session), 
  dimension = rep(1:2, c(length(out$dats[[1]]$dat$bill.session), 
                         length(out$dats[[2]]$dat$bill.session))) 
)
figures[['figA2tor_betas']] <- ggplot(betas, aes(x=abs(beta), y=..density..*..width..)) + 
  geom_histogram(bins=10) + 
  facet_grid(dimension ~ I(session+1)) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  scale_y_continuous(label = scales::percent) + 
  labs(x="Absolute Value of Beta", y="Percentage")  +
  ggtitle("Toronto")

## Figure C1: Toronto Voting in Majority ----

figures[['figC1tor_unanimity']] <- unan %>% 
  mutate(term = factor(term, labels=uterms)) %>% 
  ggplot(aes(x=m, group=term)) + 
  geom_histogram(aes(y = after_stat(density)*.05), 
                 binwidth=.05, col="white", boundary=.5) + 
  facet_wrap(~term, ncol = 4) + 
  theme_bw() + 
  theme(panel.grid=element_blank()) + 
  labs(x="Percentage Voting in the Majority", 
       y="Percentage of Votes in Term") + 
  scale_y_continuous(labels=percent) + 
  scale_x_continuous(labels=percent) +
  ggtitle("Toronto")

## Figure D2: Toronto Moran's I ----

# ingest ward census percentage data (all years)
tor_wardcensus <- read.dta13("source/tor_ward_data.dta") %>%
  rename(ward = wardnum) %>%
  mutate(cyear = ifelse(electionyear == 1997, 1996,
                        ifelse(electionyear == 2000, 2001,
                               ifelse(electionyear == 2003, 2001,
                                      ifelse(electionyear == 2006, 2006,
                                             ifelse(electionyear == 2010, 2011,
                                                    ifelse(electionyear == 2014, 2016, 
                                                           ifelse(electionyear == 2018, 2016, NA)))))))) %>%
  relocate(term, electionyear, cyear, ward)

# ingest ward shapefiles

tor_wards <- NULL

tor_wards[['E1997_C1996']] <- st_read(paste0("source/ward_shp/tor_wards28.shp")) %>%
  rename(ward = wards28) %>%
  mutate(cyear = 1996) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 1997)

tor_wards[['E2000_C2001']] <- st_read(paste0("source/ward_shp/tor_wards44.shp")) %>%
  rename(ward = wards44) %>%
  mutate(cyear = 2001) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 2000)

tor_wards[['E2006_C2006']] <- st_read(paste0("source/ward_shp/tor_wards44.shp")) %>%
  rename(ward = wards44) %>%
  mutate(cyear = 2006) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 2006)

tor_wards[['E2010_C2011']] <- st_read(paste0("source/ward_shp/tor_wards44.shp")) %>%
  rename(ward = wards44) %>%
  mutate(cyear = 2011) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 2010)

tor_wards[['E2014_C2016']] <- st_read(paste0("source/ward_shp/tor_wards44.shp")) %>%
  rename(ward = wards44) %>%
  mutate(cyear = 2016) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 2014)

tor_wards[['E2018_C2016']] <- st_read(paste0("source/ward_shp/tor_wards25.shp")) %>%
  rename(ward = wards25) %>%
  mutate(cyear = 2016) %>%
  mutate(ward = as.numeric(ward)) %>%
  select(ward, cyear, geometry) %>%
  left_join(tor_wardcensus, by = c("ward", "cyear")) %>%
  select_if(~ !any(is.na(.))) %>%
  filter(electionyear == 2018)

varname_lookup <- rio::import("source/varname_lookup.xlsx")

items <- names(tor_wards)

# set parameters

moran.summary <- NULL
discard <- NULL

for (item in items) {
  
  shp = paste0("tor_wards$", paste0(item))
  prefix = c("v_", "q_", "s_")
  weighttype = "S"
  
  # convert to sp object
  shp.sp <- eval(parse(text = shp)) %>% as_Spatial()
  
  # retrieve desired column names to use
  vars <- shp.sp@data %>% select(starts_with(prefix)) %>% colnames()
  
  # make weights table
  nb <- poly2nb(shp.sp, queen = FALSE)    # queen = FALSE means an edge is shared
  w <- nb2listw(nb, style = weighttype, zero.policy = TRUE) 
  
  # calculate Moran's I for all variables using Monte Carlo method
  moran <- NULL
  for(v in vars) {
    moran[[v]] <- moran.mc(eval(parse(text = paste0("shp.sp@data$", v))), 
                           listw = w, nsim = 499, zero.policy = TRUE, na.action = na.exclude)
  }
  
  # create moran summary table by variable
  moran.summary[[item]] <- data.frame(matrix(unlist(moran), nrow = length(moran), byrow = TRUE), row.names = vars) %>%
    select(c(X1, X3)) %>%
    rename("moran" = "X1") %>%
    rename("moranp" = "X3") %>%
    mutate(variable = rownames(.)) %>%
    mutate("rank" = dense_rank(desc(moran))) %>%
    dplyr::arrange(rank) %>%
    distinct(moran, variable, rank, .keep_all = TRUE) %>%
    filter(moran != "NaN") %>%
    mutate(moran = round(as.numeric(moran), 3)) %>%
    rename(!!paste0(item) := moran) %>%
    filter(as.numeric(moranp) < .05) %>%
    select(-rank, -moranp)
  
  discard[[item]] <- data.frame(matrix(unlist(moran), nrow = length(moran), byrow = TRUE), row.names = vars) %>%
    select(c(X1, X3)) %>%
    rename("moran" = "X1") %>%
    rename("moranp" = "X3") %>%
    mutate(variable = rownames(.)) %>%
    distinct(moran, variable, .keep_all = TRUE) %>%
    mutate(moran = round(as.numeric(moran), 3)) %>%
    rename(!!paste0(item) := moran) %>%
    filter(as.numeric(moranp) >= .05)
  
}

moran.combine <- moran.summary %>%
  reduce(full_join, by = "variable") %>%
  left_join(varname_lookup, by = "variable") %>%
  select(-vartype) %>%
  filter(!is.na(varname)) %>%
  rowwise() %>%
  relocate(varname, variable) %>%
  distinct()

# sparklines of moran coefficients

cols <- c("varname", "variable", gsub("_", "\n", items))
vars <- moran.combine$variable
sparklines <- NULL
for (i in vars) {
  
  varname = varname_lookup %>%
    filter(variable == i) %>%
    select(varname) %>%
    as_vector()
  
  table <- moran.combine 
  names(table) <- cols
  
  table <- table %>% 
    pivot_longer(cols = starts_with("E"), names_to = "ElectionYear", names_prefix = "E", values_to = "I", values_drop_na = TRUE) %>%
    filter(variable == i) %>%
    #mutate(WardScheme = as.numeric(WardScheme)) %>%
    select(varname, ElectionYear, I) %>%
    mutate(max = ifelse(I == max(I), I, NA)) %>%
    mutate(min = ifelse(I == min(I), I, NA))
  
  sparklines[[i]] <- ggplot() +
    geom_hline(yintercept = mean(table$I), colour = "grey", linetype = "dashed", linewidth = .5) +
    geom_line(data = table,  aes(x = ElectionYear, y = I, group = 1)) +
    geom_point(data = table, aes(x = ElectionYear, y = I), size = 1, colour = alpha("black", 1)) +
    geom_point(data = table, aes(x = ElectionYear, y = max), size = 3, colour = alpha("red", 1)) +
    geom_point(data = table, aes(x = ElectionYear, y = min), size = 3, colour = alpha("blue", 1)) +
    geom_text(data = table, aes(x = ElectionYear, y = max, label = max), size = 3, colour = alpha("red", 1), check_overlap = TRUE, vjust = -1) +
    geom_text(data = table, aes(x = ElectionYear, y = min, label = min), size = 3, colour = alpha("blue", 1), check_overlap = TRUE, vjust = 2) +
    scale_y_continuous(expand = expansion(add = .15)) +
    labs(title = varname, ylab = "Moran's I") +
    theme_classic() +
    theme(axis.title.y = element_text(angle = 90),
          axis.line = element_blank(),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank())
  
}

# assemble

figures[['figD2tor_moran']] <- ggarrange(sparklines$v_marmaclmarr + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
          sparklines$v_age2029 + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"),
          sparklines$v_lfoart + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
          sparklines$v_lfoblue + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
          sparklines$s_inhtotl_avg + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
          sparklines$v_edc015ppostunivbach + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
          sparklines$v_vm_visi_tot + rremove("ylab"), 
          sparklines$s_densqkm + rremove("ylab"),
          ncol = 2, nrow = 4)

## Figure E2: Toronto Correlations using different RNG seeds ----

terms <- c("1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")
names(terms) <- c(1,2,3,4,5,6,7)
termlabels <- as_tibble_col(terms, column_name = "termlabel") %>% 
  mutate(session = row_number())

### Load data

#load("source/tor_rollcalls.rda")

data <- read.dta13("source/tor_ward_data.dta") %>%
  mutate(session = term) %>% 
  left_join(termlabels) %>% 
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | term | termlabel | session | wardnum) %>% 
  filter(wardnum != 0)
tor_hist <- read.dta13("source/tor_ward_councilors.dta")

data <- left_join(tor_hist %>% select(wardnum, session, name), data)

varname_lookup <- rio::import("source/varname_lookup.xlsx")

### Process vote matrix
m <- colMeans(X, na.rm=TRUE)
unan <- tibble(
  m = m, 
  term = final_vote$term
)
unan <- unan %>% 
  mutate(m = ifelse(m < .5, 1-m, m)) %>% 
  filter(m < .95)

# Initialize lists
r1 <- r2 <- vector(mode="list", length=sims)

# Simulation loop
for(i in 1:sims){

  print(paste0("Toronto Iteration: ", i))
  print(paste0("Seed: ", seeds[i]))
  print(Sys.time())

  # h2o::h2o.removeAll()
  # h2o::h2o.shutdown(prompt = FALSE)
  # h2o.init()

  ## run legR
  out <- legR(
    X,
    terms=final_vote$term,
    #    terms=tmp_trm,
    legis_data=h_all,
    minprop=.2,
    nRounds = 1,
    othermax=.35,
    minperterm=10,
    k=2,
    seed = seeds[i],
    ndim=2,
    method="glrm",
    max_mem_size="8g",
    est_model = FALSE
  )

  ## set polarity anchors on priors
  priors <- out$priors
  priors[[1]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[1]]$x.mu0 ))
  priors[[2]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[2]]$x.mu0 ))
  priors[[1]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[1]]$x.sigma0 ))
  priors[[2]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[2]]$x.sigma0 ))
  priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "D.Holyday"),1] <- 1
  priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Mihevc"),1] <- -1
  priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "McConnell"),1] <- 1
  priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Ootes"),1] <- -1
  priors[[1]]$x.sigma0[which(out$dats[[1]]$id$name %in% c("D.Holyday", "Mihevc"))] <- .1
  priors[[2]]$x.sigma0[which(out$dats[[2]]$id$name %in% c("Ootes", "McConnell"))] <- .1

  ## estimate model with priors set
  out <- legR(
    X,
    terms=final_vote$term,
    #    terms=tmp_trm,
    legis_data=h_all,
    priors = priors,
    minprop=.2,
    nRounds = 1,
    othermax=.35,
    minperterm=10,
    k=2,
    seed = seeds[i],
    ndim=2,
    method="glrm",
    max_mem_size="8g",
    est_model = TRUE
  )
  x <- gather_data(out)
  x <- x %>% dplyr::filter(!((Dim1 == 0 | is.na(Dim1)) & (Dim2 == 0 | is.na(Dim2))))
  x <- x %>% arrange(session, name)
  flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
  x <- left_join(x, flipx %>% mutate(session = as.numeric(session)))
  ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
  x <- x %>% mutate(Dim1 = -Dim1*flip1,
                    Dim2 = Dim2*flip2,
                    Dim1 = -Dim1) %>%
    select(-contains("flip"))
  xs <- x %>% group_split(session) %>% map(function(x){
    o <- orthogonalize(x)
    o <- o %>% select(-Dim1, -Dim2) %>% rename(Dim1=Dim1_gs, Dim2=Dim2_gs)
    o})
  x <- bind_rows(xs)
  # rio::export(x, paste0("tabsfigs_1019/toronto_placements_", substr(as.character(Sys.time()), 1, 10), ".dta"))

  # Assemble correlation matrices

  data1 <-  x %>%
    left_join(tor_hist %>% select(name, session, wardnum)) %>%
    filter(!is.na(wardnum)) %>%
    right_join(data)


  terms <- c("Pooled", "1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")


  byterm <- split(data1, f = data1$termlabel)
  byterm[["Pooled"]] <- data1  # add whole dataset as "pooled" item


  cormatrix <- NULL
  cormatrix_d1 <- NULL
  cormatrix_d2 <- NULL

  for(t in terms){

    byterm[[t]] <- byterm[[t]] %>%
      select(-c(name, session, wardnum, term, termlabel)) # drop termlabel string variable from byterm to enable correlation

    cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table

    cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
      focus(Dim1) %>%
      filter(!grepl("Dim2", term)) %>%
      rename(!!paste0(t) := Dim1)        # rename coefficient variable to termlabel

    cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
      focus(Dim2) %>%
      filter(!grepl("Dim1", term)) %>%
      rename(!!paste0(t) := Dim2)        # rename coefficient variable to termlabel

  }

  # Build Dim1 correlations by session
  # join on variable names to make table across terms

  cortable_d1 <- bind_cols(cormatrix_d1) %>%
    rename(variable = term...1) %>%           # rename first variable name column
    select(!starts_with("term")) %>%          # drop duplicate variable name columns
    relocate(Pooled, .after = variable)

  r <- sapply(4:8, function(i)mean(sign(cortable_d1[,3]) == sign(cortable_d1[,i]), na.rm=TRUE))

  flips <- (4:8)[which(r < .5)]
  if(length(flips) > 0){
    cortable_d1 <- cortable_d1 %>%
      mutate(across(all_of(names(cortable_d1)[flips]), ~-.x))
  }

  r1[[i]] <- cortable_d1

  # Build Dim2 correlations by session
  # join on variable names to make table across terms

  cortable_d2 <- bind_cols(cormatrix_d2) %>%
    rename(variable = term...1) %>%           # rename first variable name column
    select(!starts_with("term")) %>%          # drop duplicate variable name columns
    relocate(Pooled, .after = variable)

  r <- sapply(4:8, function(i) mean(sign(cortable_d2[,3]) == sign(cortable_d2[,i]), na.rm=TRUE))

  flips <- (4:8)[which(r < .5)]
  if(length(flips) > 0){
    cortable_d2 <- cortable_d2 %>%
      mutate(across(all_of(names(cortable_d2)[flips]), ~-.x))
  }

  r2[[i]] <- cortable_d2

} # end simulation loop


load("temp/tor_baseline.rda")


data1 <-  tor_baseline %>%
  left_join(tor_hist %>% select(name, session, wardnum)) %>%
  filter(!is.na(wardnum)) %>%
  right_join(data)


terms <- c("Pooled", "1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")


byterm <- split(data1, f = data1$termlabel)
byterm[["Pooled"]] <- data1  # add whole dataset as "pooled" item


cormatrix <- NULL
cormatrix_d1 <- NULL
cormatrix_d2 <- NULL

for(t in terms){

  byterm[[t]] <- byterm[[t]] %>%
    select(-c(name, session, wardnum, term, termlabel)) # drop termlabel string variable from byterm to enable correlation

  cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table

  cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1) %>%
    filter(!grepl("Dim2", term)) %>%
    rename(!!paste0(t) := Dim1)        # rename coefficient variable to termlabel

  cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2) %>%
    filter(!grepl("Dim1", term)) %>%
    rename(!!paste0(t) := Dim2)        # rename coefficient variable to termlabel

}

# Build Dim1 correlations by session
# join on variable names to make table across terms

cortable_d1 <- bind_cols(cormatrix_d1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable)

r <- sapply(4:8, function(i)mean(sign(cortable_d1[,3]) == sign(cortable_d1[,i]), na.rm=TRUE))

flips <- (4:8)[which(r < .5)]
if(length(flips) > 0){
  cortable_d1 <- cortable_d1 %>%
    mutate(across(all_of(names(cortable_d1)[flips]), ~-.x))
}

baseline_d1 <- cortable_d1

# Build Dim2 correlations by session
# join on variable names to make table across terms

cortable_d2 <- bind_cols(cormatrix_d2) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable)

r <- sapply(4:8, function(i) mean(sign(cortable_d2[,3]) == sign(cortable_d2[,i]), na.rm=TRUE))

flips <- (4:8)[which(r < .5)]
if(length(flips) > 0){
  cortable_d2 <- cortable_d2 %>%
    mutate(across(all_of(names(cortable_d2)[flips]), ~-.x))
}

baseline_d2 <- cortable_d2

r1_comp <- r2_comp <- list()
k <- 1
for(i in seq_along(r1)){
  r1_comp[[k]] <- baseline_d1 %>% pivot_longer(-variable) %>% rename(baseline = value) %>% left_join(r1[[i]] %>% pivot_longer(-variable))
  r2_comp[[k]] <- baseline_d2 %>% pivot_longer(-variable) %>% rename(baseline = value) %>% left_join(r2[[i]] %>% pivot_longer(-variable))
  k <- k+1
}

r1_comp <- r1_comp %>% bind_rows(.id="sim")
r2_comp <- r2_comp %>% bind_rows(.id="sim")

r1_comp <- r1_comp %>%
  mutate(abs_baseline_cat = cut(abs(baseline),
                                breaks=seq(0,1, by=.1),
                                include.lowest = TRUE,
                                right = TRUE),
         baseline_cat = cut(baseline,
                            breaks=c(-1,seq(-.6,.6, by=.1),1),
                            include.lowest = TRUE,
                            right = TRUE))

r2_comp <- r2_comp %>%
  mutate(abs_baseline_cat = cut(abs(baseline),
                                breaks=seq(0,1, by=.1),
                                include.lowest = TRUE,
                                right = TRUE),
         baseline_cat = cut(baseline,
                            breaks=c(-1,seq(-.4,.4, by=.1),1),
                            include.lowest = TRUE,
                            right = TRUE))

r1_comp <- r1_comp %>%
  mutate(baseline_pt = ifelse(sim == 1, baseline, NA))
r2_comp <- r2_comp %>%
  mutate(baseline_pt = ifelse(sim == 1, baseline, NA))

figures[['figE2tor_r1']] <- ggplot(r1_comp %>% na.omit(),
       aes(x=value, y=baseline_cat)) +
  geom_density_ridges(scale=.9, fill="gray90") +
  geom_point(aes(x=baseline_pt), position=position_jitter(height=.1), alpha=.25) +
  theme_classic() +
  labs(x="Absolute Correlations from Simulation",
       y = "Absolute Correlation of Original")

figures[['figE2tor_r2']] <- ggplot(r2_comp %>% na.omit(),
       aes(x=value, y=baseline_cat)) +
  geom_density_ridges(scale=.9, fill="gray90") +
  geom_point(aes(x=baseline_pt), position=position_jitter(height=.1), alpha=.25) +
  geom_vline(xintercept=0, linetype=3) +
  theme_classic() +
  labs(x="Absolute Correlations from Simulation",
       y = "Absolute Correlation of Original")

# NOMINATE Diagnostic Results: Curated Data ----

load("source/tor_rollcalls.rda")
tor_votes <- list()
for(i in 1:7){
  tmpX <- X[,which(final_vote$term == i)]
  small_col <- apply(tmpX, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  if(any(small_col < .5*max(small_col))){
    tmpX <- tmpX[, -which(small_col < .5*max(small_col, na.rm=TRUE))]
  }
  small_vote <- apply(tmpX, 1, function(x)sum(!is.na(x)))
  ld <- h_all[-which(small_vote < 25), ]
  ld$party <- "none"
  ld <- ld %>% select(name, party) %>% as.data.frame()
  tmpX <- tmpX[-which(small_vote < 25), ]
  sdx <- apply(tmpX, 2, sd, na.rm=TRUE)
  if(any(sdx == 0)){
    tmpX <- tmpX[,-which(sdx == 0)]
  }
  tor_votes[[i]] <- rollcall(tmpX, legis.names=ld$name, legis.data = ld, desc = paste0("Toronto City Council Votes for Session ", i))
}

dw_tor <- dwnominate(tor_votes, minvotes=5, lop=.01)


tor_wnres <- list()
for(i in 1:7){
  tor_wnres[[i]] <- dwnominate:::auto_wnominate(tor_votes[[i]], minvotes=5, lop=.01)
}

for(i in 1:7){
  tor_wnres[[i]]$legislators$session <- i
}

h_all <- as.data.frame(h_all)
rc_all <- rollcall(X, legis.names=h_all$name, legis.data = h_all)

tor_wn_one <- dwnominate:::auto_wnominate(rc_all, minvotes=5, lop=.01)

load("temp/tor_legr_out.rda")

tor_curated_d1 <- tor_legr_out$dats[[1]]$dat$rc
rownames(tor_curated_d1) <- tor_legr_out$dats[[1]]$id$name
colnames(tor_curated_d1) <- paste0("V", 1:ncol(tor_curated_d1))
tor_curated_d1[which(tor_curated_d1 == 0, arr.ind=TRUE)] <- NA
tor_curated_d1[which(tor_curated_d1 == -1, arr.ind=TRUE)] <- 0
tor_curated_d2 <- tor_legr_out$dats[[2]]$dat$rc
rownames(tor_curated_d2) <- tor_legr_out$dats[[2]]$id$name
colnames(tor_curated_d2) <- paste0("V", ncol(tor_curated_d1)+c(1:ncol(tor_curated_d2)))
tor_curated_d2[which(tor_curated_d2 == 0, arr.ind=TRUE)] <- NA
tor_curated_d2[which(tor_curated_d2 == -1, arr.ind=TRUE)] <- 0
tor_curated_all <- full_join(as_tibble(tor_curated_d1, rownames="name"), 
                             as_tibble(tor_curated_d2, rownames="name"))
nms <- tor_curated_all$name
tor_curated_all <- as.matrix(tor_curated_all[,-1])
rownames(tor_curated_all) <- nms
tor_t1 <- tor_legr_out$dats[[1]]$dat$bill.session+1
tor_t2 <- tor_legr_out$dats[[2]]$dat$bill.session+1
tor_tall <- c(tor_t1, tor_t2)

tor_ld1 <- data.frame(name = rownames(tor_curated_d1), party="none")
tor_ld2 <- data.frame(name = rownames(tor_curated_d2), party="none")
tor_lda <- data.frame(name = rownames(tor_curated_all), party="none")


tor_c1 <- tor_c2 <- tor_c_all <- list()
for(i in 1:7){
  tmpX1 <- tor_curated_d1[,which(tor_t1 == i)]
  tmpX2 <- tor_curated_d2[,which(tor_t2 == i)]
  tmpXa <- tor_curated_all[,which(tor_tall == i)]
  small_col1 <- apply(tmpX1, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  small_col2 <- apply(tmpX2, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  small_cola <- apply(tmpXa, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  if(any(small_col1 < .5*max(small_col1))){
    tmpX1 <- tmpX1[, -which(small_col1 < .5*max(small_col1, na.rm=TRUE))]
  }
  if(any(small_col2 < .5*max(small_col2))){
    tmpX2 <- tmpX2[, -which(small_col2 < .5*max(small_col2, na.rm=TRUE))]
  }
  if(any(small_cola < .5*max(small_cola))){
    tmpXa <- tmpXa[, -which(small_cola < .5*max(small_cola, na.rm=TRUE))]
  }
  
  small_vote1 <- apply(tmpX1, 1, function(x)sum(!is.na(x)))
  small_vote2 <- apply(tmpX2, 1, function(x)sum(!is.na(x)))
  small_votea <- apply(tmpXa, 1, function(x)sum(!is.na(x)))
  ld1 <- tor_ld1[-which(small_vote1 < 25), ]
  ld2 <- tor_ld2[-which(small_vote2 < 25), ]
  lda <- tor_lda[-which(small_votea < 25), ]
  ld1 <- ld1 %>% select(name, party) %>% as.data.frame()
  ld2 <- ld2 %>% select(name, party) %>% as.data.frame()
  lda <- lda %>% select(name, party) %>% as.data.frame()
  tmpX1 <- tmpX1[-which(small_vote1 < 25), ]
  tmpX2 <- tmpX2[-which(small_vote2 < 25), ]
  tmpXa <- tmpXa[-which(small_votea < 25), ]
  sdx1 <- apply(tmpX1, 2, sd, na.rm=TRUE)
  sdx2 <- apply(tmpX2, 2, sd, na.rm=TRUE)
  sdxa <- apply(tmpXa, 2, sd, na.rm=TRUE)
  if(any(sdx1 == 0)){
    tmpX1 <- tmpX1[,-which(sdx1 == 0)]
  }
  if(any(sdx2 == 0)){
    tmpX2 <- tmpX2[,-which(sdx2 == 0)]
  }
  if(any(sdxa == 0)){
    tmpXa <- tmpXa[,-which(sdxa == 0)]
  }
  
  tor_c1[[i]] <- rollcall(tmpX1, legis.names=ld1$name, legis.data = ld1, desc = paste0("Toronto City Council Votes for Session ", i))
  tor_c2[[i]] <- rollcall(tmpX2, legis.names=ld2$name, legis.data = ld2, desc = paste0("Toronto City Council Votes for Session ", i))
  tor_c_all[[i]] <- rollcall(tmpXa, legis.names=lda$name, legis.data = lda, desc = paste0("Toronto City Council Votes for Session ", i))
}


dw_tor_curated_d1 <- dwnominate(tor_c1, minvotes=5, lop=.01, dims = 1)
dw_tor_curated_d2 <- dwnominate(tor_c2, minvotes=5, lop=.01, dims = 1)
dw_tor_curated_dall <- dwnominate(tor_c_all, minvotes=5, lop=.01, dims = 2)


save(tor_wnres, tor_wn_one, dw_tor, dw_tor_curated_d1, dw_tor_curated_d2, dw_tor_curated_dall, 
     file="temp/toronto_nominate.rda")


## Toronto Nominate Figures ----

load("temp/toronto_nominate.rda")

tor_dw_latent <- dw_tor$legislators %>% select(session, name, coord1D, coord2D) %>% rename(Dim1_dw=coord1D, Dim2_dw=coord2D)
tor_wn1_latent <- tor_wn_one$legislators %>% select(name, coord1D, coord2D) %>% rename(Dim1_wn1=coord1D, Dim2_wn1=coord2D) 

tor_wn_latent <- purrr::map(tor_wnres, \(x)x$legislators %>% select(c(name, coord1D, coord2D)) %>% rename(Dim1_wn=coord1D, Dim2_wn=coord2D)) %>% 
  bind_rows(.id="session")
rownames(tor_wn_latent) <- NULL

tor_dw_c1_latent <- dw_tor_curated_d1$legislators %>% select(session, name, coord1D) %>% rename(Dim1_dwc1=coord1D)
tor_dw_c2_latent <- dw_tor_curated_d2$legislators %>% select(session, name, coord1D) %>% rename(Dim2_dwc1=coord1D)
tor_dw_call_latent <- dw_tor_curated_dall$legislators %>% select(session, name, coord1D, coord2D) %>% rename(Dim1_dwca=coord1D, Dim2_dwca=coord2D)

tor_dw_c1_latent <- full_join(tor_dw_c1_latent, tor_dw_c2_latent) %>% arrange(session, name)
tor_dw_call_latent <- tor_dw_call_latent %>% arrange(session, name)

tmp1 <- tor_dw_c1_latent %>% 
  setNames(c('session', 'name', 'Dim1', 'Dim2')) %>% 
  dplyr::group_split(session) %>% 
  purrr::map(function(x){
    o <- legR::orthogonalize(x)
    o <- o %>% dplyr::select(-Dim1, -Dim2) %>% dplyr::rename(Dim1_dwc1=Dim1_gs, Dim2_dwc1=Dim2_gs)
    o})
tor_dw_c1_latent <- bind_rows(tmp1)

tmp2 <- tor_dw_call_latent %>% 
  setNames(c('session', 'name', 'Dim1', 'Dim2')) %>% 
  dplyr::group_split(session) %>% 
  purrr::map(function(x){
    o <- legR::orthogonalize(x)
    o <- o %>% dplyr::select(-Dim1, -Dim2) %>% dplyr::rename(Dim1_dwca=Dim1_gs, Dim2_dwca=Dim2_gs)
    o})
tor_dw_call_latent <- bind_rows(tmp2)

tor_latent <- full_join(tor_dw_latent, tor_wn_latent %>% mutate(session = as.numeric(session)))
tor_latent <- left_join(tor_latent, tor_wn1_latent)
tor_latent <- left_join(tor_latent, tor_dw_c1_latent)
tor_latent <- left_join(tor_latent, tor_dw_call_latent)

tor_latent <- tor_latent %>% mutate(across(c(Dim1_wn, Dim2_wn, Dim2_wn1, Dim2_dwca), ~-.x))
save(tor_latent, file="temp/tor_latent.rda")



load("source/tor_rollcalls.rda")
trm <- final_vote$term
pb <- progress_bar$new(total = ncol(X))
res <- NULL
for(i in 1:ncol(X)){
  pb$tick()
  tmp.trm <- trm[i]
  dat <- tibble(vote = X[,i], name = h_all$name)
  lat <- tor_latent %>% filter(session == tmp.trm)
  dat <- inner_join(dat, lat, by="name") %>% na.omit()
  if(sum(dat$vote == 1, na.rm=TRUE) > 0 & sum(dat$vote == 0, na.rm=TRUE) > 0){
    m1a <- suppressWarnings(glm(vote ~ Dim1_dw, data=dat, family=binomial))
    m2a <- suppressWarnings(glm(vote ~ Dim2_dw, data=dat, family=binomial))
    m12a <- suppressWarnings(glm(vote ~ Dim1_dw + Dim2_dw, data=dat, family=binomial))
    m1b <- suppressWarnings(glm(vote ~ Dim1_wn, data=dat, family=binomial))
    m2b <- suppressWarnings(glm(vote ~ Dim2_wn, data=dat, family=binomial))
    m12b <- suppressWarnings(glm(vote ~ Dim1_wn + Dim2_wn, data=dat, family=binomial))
    m1c <- suppressWarnings(glm(vote ~ Dim1_wn1, data=dat, family=binomial))
    m2c <- suppressWarnings(glm(vote ~ Dim2_wn1, data=dat, family=binomial))
    m12c <- suppressWarnings(glm(vote ~ Dim1_wn1 + Dim2_wn1, data=dat, family=binomial))
    m1d <- suppressWarnings(glm(vote ~ Dim1_dwc1, data=dat, family=binomial))
    m2d <- suppressWarnings(glm(vote ~ Dim2_dwc1, data=dat, family=binomial))
    m12d <- suppressWarnings(glm(vote ~ Dim1_dwc1 + Dim2_dwc1, data=dat, family=binomial))
    m1e <- suppressWarnings(glm(vote ~ Dim1_dwca, data=dat, family=binomial))
    m2e <- suppressWarnings(glm(vote ~ Dim2_dwca, data=dat, family=binomial))
    m12e <- suppressWarnings(glm(vote ~ Dim1_dwca + Dim2_dwca, data=dat, family=binomial))
    ce1a <- sum(m1a$y != round(m1a$fitted))
    ce2a <- sum(m2a$y != round(m2a$fitted))
    ce12a <- sum(m12a$y != round(m12a$fitted))
    ce1b <- sum(m1b$y != round(m1b$fitted))
    ce2b <- sum(m2b$y != round(m2b$fitted))
    ce12b <- sum(m12b$y != round(m12b$fitted))
    ce1c <- sum(m1c$y != round(m1c$fitted))
    ce2c <- sum(m2c$y != round(m2c$fitted))
    ce12c <- sum(m12c$y != round(m12c$fitted))
    ce1d <- sum(m1d$y != round(m1d$fitted))
    ce2d <- sum(m2d$y != round(m2d$fitted))
    ce12d <- sum(m12d$y != round(m12d$fitted))
    ce1e <- sum(m1e$y != round(m1e$fitted))
    ce2e <- sum(m2e$y != round(m2e$fitted))
    ce12e <- sum(m12e$y != round(m12e$fitted))
    minvotes <- min(table(na.omit(dat$vote)))
    nvotes <- sum(!is.na(dat$vote))
    res <- rbind(res, data.frame(session=tmp.trm, 
                                 minvote = minvotes, 
                                 ce1_dw = ce1a, 
                                 ce2_dw = ce2a, 
                                 ce12_dw = ce12a,
                                 ce1_wn = ce1b, 
                                 ce2_wn = ce2b, 
                                 ce12_wn = ce12b,
                                 ce1_wn1 = ce1c, 
                                 ce2_wn1 = ce2c, 
                                 ce12_wn1 = ce12c,
                                 ce1_dwc1 = ce1d, 
                                 ce2_dwc1 = ce2d, 
                                 ce12_dwc1 = ce12d, 
                                 ce1_dwca = ce1e, 
                                 ce2_dwca = ce2e, 
                                 ce12_dwca = ce12e, 
                                 n=nvotes))
  }
}
tor_dw_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_dw = sum(minvote - ce1_dw)/sum(minvote), 
            apre2_dw = sum(minvote - ce2_dw)/sum(minvote), 
            apre12_dw = sum(minvote - ce12_dw)/sum(minvote)) %>% 
  mutate(apre_diff_dw = apre12_dw-apre1_dw)

tor_dw_apre <- res %>% 
  summarise(apre1_dw = sum(minvote - ce1_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_dw = sum(minvote - ce2_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_dw = sum(minvote - ce12_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_dw = apre12_dw-apre1_dw)

tor_wn_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_wn = sum(minvote - ce1_wn)/sum(minvote), 
            apre2_wn = sum(minvote - ce2_wn)/sum(minvote), 
            apre12_wn = sum(minvote - ce12_wn)/sum(minvote)) %>% 
  mutate(apre_diff_wn = apre12_wn-apre1_wn)

tor_wn_apre <- res %>% 
  summarise(apre1_wn = sum(minvote - ce1_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_wn = sum(minvote - ce2_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_wn = sum(minvote - ce12_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_wn = apre12_wn-apre1_wn)

tor_wn1_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_wn1 = sum(minvote - ce1_wn1)/sum(minvote), 
            apre2_wn1 = sum(minvote - ce2_wn1)/sum(minvote), 
            apre12_wn1 = sum(minvote - ce12_wn1)/sum(minvote)) %>% 
  mutate(apre_diff_wn1 = apre12_wn1-apre1_wn1)

tor_wn1_apre <- res %>% 
  summarise(apre1_wn1 = sum(minvote - ce1_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_wn1 = sum(minvote - ce2_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_wn1 = sum(minvote - ce12_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_wn1 = apre12_wn1-apre1_wn1)


tor_nom_apre <- bind_cols(bind_rows(tor_dw_apre %>% mutate(session = 0) %>% select(session, everything()), tor_dw_apre_by_term),
                          bind_rows(tor_wn_apre %>% mutate(session = 0) %>% select(session, everything()), tor_wn_apre_by_term) %>% select(-session),
                          bind_rows(tor_wn1_apre %>% mutate(session = 0) %>% select(session, everything()), tor_wn1_apre_by_term) %>% select(-session)) %>% 
  mutate(city = "Toronto", 
         session = factor(session, 
                          levels=0:7, 
                          labels= c("Overall", "1998-2000 Lastman", "2000-2003 Lastman", 
                                    "2003-2006 Miller", "2006-2010 Miller", 
                                    "2010-2014 Ford", "2014-2018 Tory", "2018-2022 Tory"))) %>% 
  select(-starts_with("apre2"), -starts_with("apre_d")) %>% 
  mutate(apre12_dw = apre12_dw-apre1_dw, 
         apre12_wn = apre12_wn-apre1_wn, 
         apre12_wn1 = apre12_wn1-apre1_wn1) %>% 
  pivot_longer(starts_with("apre1"), names_pattern = "(.*)_(.*)", names_to=c("dimension", "model"), values_to="apre") %>% 
  mutate(dimension = factor(dimension, 
                            levels=c("apre12", "apre1"), 
                            labels=c("Dimension 1+2", "Dimension 1")), 
         model = factor(model, levels=c("dw", "wn", "wn1"), 
                        labels=c("DW-NOMINATE", "W-NOMINATE (term-by-term)", "W-NOMINATE (one-shot)"))) %>% 
  mutate(session_num = as.numeric(session)-1, 
         session_num = ifelse(session_num  ==0, -.5, session_num))


save(tor_nom_apre, file="temp/tor_nom_apre.rda")


tor_dwc1_apre <- res %>% 
  summarise(apre1_dwc1 = sum(minvote - ce1_dwc1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_dwc1 = sum(minvote - ce2_dwc1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_dwc1 = sum(minvote - ce12_dwc1, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_dwc1 = apre12_dwc1-apre1_dwc1)


tor_dwca_apre <- res %>% 
  summarise(apre1_dwca = sum(minvote - ce1_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_dwca = sum(minvote - ce2_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_dwca = sum(minvote - ce12_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_dwca = apre12_dwca-apre1_dwca)




cat("DW-NOMINATE RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_dw))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_dw))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_dw))/sum(res$n))*100)

cat("W-NOMINATE (TERM-BY-TERM) RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_wn))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_wn))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_wn))/sum(res$n))*100)

cat("W-NOMINATE (ONE-SHOT) RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_wn1))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_wn1))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_wn1))/sum(res$n))*100)



cat("Percentage of PRE\n")
tor_nom_apre %>% 
  filter(dimension %in% c("Dimension 1", "Dimension 1+2")) %>% 
  select(session, model, dimension, apre) %>% 
  group_by(session, model) %>% 
  reframe(pct1 = apre[1]/sum(apre)*100, 
          pct2 = apre[2]/sum(apre)*100) %>%
  as.data.frame()

figures[['tor_nom_apre']] <- tor_nom_apre %>% 
  ggplot(aes(y=apre, fill=dimension)) + 
  geom_bar(aes(x=session_num), stat="identity")  + 
  theme_bw() + 
  labs(x="", y="APRE", fill="") + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.25), 
        text = element_text(size = 8),
        legend.position="top", 
        legend.background = element_blank(), 
        legend.key = element_rect(fill = "transparent", colour = "transparent"), 
        panel.grid=element_blank()) + 
  scale_fill_manual(values = c("grey70", "grey30")) +
  scale_y_continuous(labels=label_percent(accuracy=1)) + 
  scale_x_continuous(breaks = c(-.5, 1:7), labels=levels(tor_nom_apre$session)) + 
  guides(fill=guide_legend(nrow=1,byrow=TRUE)) + 
  facet_wrap(~model, ncol=1)

## correlation table figures
terms <- c("Pooled","1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")

data <- read.dta13("source/tor_ward_data.dta") %>%
  mutate(session = term, 
         termlabel = terms[-1][session]) %>% 
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | term | termlabel | session | wardnum) %>% 
  filter(wardnum != 0)
tor_hist <- read.dta13("source/tor_ward_councilors.dta")

data <- left_join(tor_hist %>% select(wardnum, session, name), data)

data_dw <- data %>% 
  left_join(tor_latent %>% select(name, session, Dim1_dw, Dim2_dw)) %>% 
  select(-session, -name, -wardnum, -term)

data_wn <- data %>% 
  left_join(tor_latent %>% select(name, session, Dim1_wn, Dim2_wn)) %>% 
  select(-session, -name, -wardnum, -term)

data_wn1 <- data %>% 
  left_join(tor_latent %>% select(name, session, Dim1_wn1, Dim2_wn1)) %>% 
  select(-session, -name, -wardnum, -term)

data_dwc1 <- data %>% 
  left_join(tor_latent %>% select(name, session, Dim1_dwc1, Dim2_dwc1)) %>% 
  select(-session, -name, -wardnum, -term)

data_dwca <- data %>% 
  left_join(tor_latent %>% select(name, session, Dim1_dwca, Dim2_dwca)) %>% 
  select(-session, -name, -wardnum, -term)

rownames(data_dw) <- rownames(data_wn) <- rownames(data_wn1) <- NULL

varname_lookup <- rio::import("source/varname_lookup.xlsx")
varname_lookup <- bind_rows(varname_lookup, tibble(variable = "v_lfosci", varname = "% Science/Tech Jobs", vartype="status"))

byterm_dw <- split(data_dw, f = data_dw$termlabel) 
byterm_wn <- split(data_wn, f = data_wn$termlabel) 
byterm_wn1 <- split(data_wn1, f = data_wn1$termlabel) 
byterm_dwc1 <- split(data_dwc1, f = data_dwc1$termlabel) 
byterm_dwca <- split(data_dwca, f = data_dwca$termlabel) 

byterm_dw[["Pooled"]] <- data_dw  # add whole dataset as "pooled" item  
byterm_wn[["Pooled"]] <- data_wn  # add whole dataset as "pooled" item  
byterm_wn1[["Pooled"]] <- data_wn1  # add whole dataset as "pooled" item  
byterm_dwc1[["Pooled"]] <- data_dwc1  # add whole dataset as "pooled" item  
byterm_dwca[["Pooled"]] <- data_dwca  # add whole dataset as "pooled" item  


terms <- c("Pooled","1998-2001","2001-2003","2004-2006","2006-2010","2010-2014","2014-2018", "2018-2022")

cormatrix_dw <- NULL
cormatrix_d1_dw <- NULL
cormatrix_d2_dw <- NULL
cormatrix_wn <- NULL
cormatrix_d1_wn <- NULL
cormatrix_d2_wn <- NULL
cormatrix_wn1 <- NULL
cormatrix_d1_wn1 <- NULL
cormatrix_d2_wn1 <- NULL
cormatrix_dwc1 <- NULL
cormatrix_d1_dwc1 <- NULL
cormatrix_d2_dwc1 <- NULL
cormatrix_dwca <- NULL
cormatrix_d1_dwca <- NULL
cormatrix_d2_dwca <- NULL

for(t in terms){
  
  byterm_dw[[t]] <- byterm_dw[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_dw[[t]] <- correlate(byterm_dw[[t]], quiet = TRUE) # correlate entire table
  byterm_wn[[t]] <- byterm_wn[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_wn[[t]] <- correlate(byterm_wn[[t]], quiet = TRUE) # correlate entire table
  byterm_wn1[[t]] <- byterm_wn1[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_wn1[[t]] <- correlate(byterm_wn1[[t]], quiet = TRUE) # correlate entire table
  byterm_dwc1[[t]] <- byterm_dwc1[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_dwc1[[t]] <- correlate(byterm_dwc1[[t]], quiet = TRUE) # correlate entire table
  byterm_dwca[[t]] <- byterm_dwca[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_dwca[[t]] <- correlate(byterm_dwca[[t]], quiet = TRUE) # correlate entire table
  cormatrix_d1_dw[[t]] <- cormatrix_dw[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_dw) %>%
    rename(!!paste0(t) := Dim1_dw)        # rename coefficient variable to termlabela
  cormatrix_d2_dw[[t]] <- cormatrix_dw[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_dw) %>%
    rename(!!paste0(t) := Dim2_dw)        # rename coefficient variable to termlabel
  cormatrix_d1_wn[[t]] <- cormatrix_wn[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_wn) %>%
    rename(!!paste0(t) := Dim1_wn)        # rename coefficient variable to termlabela
  cormatrix_d2_wn[[t]] <- cormatrix_wn[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_wn) %>%
    rename(!!paste0(t) := Dim2_wn)        # rename coefficient variable to termlabel
  cormatrix_d1_wn1[[t]] <- cormatrix_wn1[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_wn1) %>%
    rename(!!paste0(t) := Dim1_wn1)        # rename coefficient variable to termlabela
  cormatrix_d2_wn1[[t]] <- cormatrix_wn1[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_wn1) %>%
    rename(!!paste0(t) := Dim2_wn1)        # rename coefficient variable to termlabel
  cormatrix_d1_dwc1[[t]] <- cormatrix_dwc1[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_dwc1) %>%
    rename(!!paste0(t) := Dim1_dwc1)        # rename coefficient variable to termlabela
  cormatrix_d2_dwc1[[t]] <- cormatrix_dwc1[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_dwc1) %>%
    rename(!!paste0(t) := Dim2_dwc1)        # rename coefficient variable to termlabel
  cormatrix_d1_dwca[[t]] <- cormatrix_dwca[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_dwca) %>%
    rename(!!paste0(t) := Dim1_dwca)        # rename coefficient variable to termlabela
  cormatrix_d2_dwca[[t]] <- cormatrix_dwca[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_dwca) %>%
    rename(!!paste0(t) := Dim2_dwca)        # rename coefficient variable to termlabel
}

# drop coefficients >= -.2 | <= .2
# drop if more than 3 NAs in row, Dim variables

cortable_d1_dw <- bind_cols(cormatrix_d1_dw) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_dw <- bind_cols(cormatrix_d2_dw) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d1_wn <- bind_cols(cormatrix_d1_wn) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_wn <- bind_cols(cormatrix_d2_wn) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d1_wn1 <- bind_cols(cormatrix_d1_wn1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_wn1 <- bind_cols(cormatrix_d2_wn1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)


cortable_d1_dwc1 <- bind_cols(cormatrix_d1_dwc1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "Dim")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable) %>% 
  filter(!is.na(vartype))

cortable_d2_dwc1 <- bind_cols(cormatrix_d2_dwc1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "Dim")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)%>% 
  filter(!is.na(vartype))


cortable_d1_dwca <- bind_cols(cormatrix_d1_dwca) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "Dim")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable) %>% 
  filter(!is.na(vartype))

cortable_d2_dwca <- bind_cols(cormatrix_d2_dwca) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "Dim")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable) %>% 
  filter(!is.na(vartype))


### Toronto, D1 DW-NOMINATE ----
tmp1_dw <- cortable_d1_dw %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_dw$np))-.5
tmp1_dw <- tmp1_dw %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_dw %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['tor_cor_d1_dw']] <- ggplot(tmp1_dw, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_dw$fac)), labels=levels(tmp1_dw$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_dw$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_dw$fac))+.5), expand=FALSE) 

### Toronto, D2 DW-NOMINATE ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_dw <- cortable_d2_dw %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_dw$np))+.5
tmp2_dw <- tmp2_dw %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_dw %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_dw$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['tor_cor_d2_dw']] <- ggplot(tmp2_dw, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_dw$fac)), labels=levels(tmp2_dw$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_dw$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2_dw$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2_dw$fac))+.5), expand=FALSE) 

### Toronto, D1 W-NOMINATE (term by term) ----
tmp1_wn <- cortable_d1_wn %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_wn$np))-.5
tmp1_wn <- tmp1_wn %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_wn %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['tor_cor_d1_wn']] <- ggplot(tmp1_wn, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_wn$fac)), labels=levels(tmp1_wn$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_wn$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_wn$fac))+.5), expand=FALSE) 

### Toronto, D2 W-NOMINATE (term by term) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_wn <- cortable_d2_wn %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_wn$np))+.5
tmp2_wn <- tmp2_wn %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_wn %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_wn$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['tor_cor_d2_wn']] <- ggplot(tmp2_wn, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_wn$fac)), labels=levels(tmp2_wn$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_wn$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2_wn$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2_wn$fac))+.5), expand=FALSE) 

### Toronto, D1 W-NOMINATE (one-shot) ----
tmp1_wn1 <- cortable_d1_wn1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_wn1$np))-.5
tmp1_wn1 <- tmp1_wn1 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_wn1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['tor_cor_d1_wn1']] <- ggplot(tmp1_wn1, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_wn1$fac)), labels=levels(tmp1_wn1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_wn1$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_wn1$fac))+.5), expand=FALSE) 

### Toronto, D2 W-NOMINATE (one-shot) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_wn1 <- cortable_d2_wn1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_wn1$np))+.5
tmp2_wn1 <- tmp2_wn1 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_wn1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_wn1$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['tor_cor_d2_wn1']] <- ggplot(tmp2_wn1, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_wn1$fac)), labels=levels(tmp2_wn1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_wn1$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2_wn1$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2_wn1$fac))+.5), expand=FALSE) 


### Toronto, D1 DW-NOMINATE (Curated Variables, two 1D Models) ----


tmp1_dwc1 <- cortable_d1_dwc1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_dwc1$np))-.5
tmp1_dwc1 <- tmp1_dwc1 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_dwc1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['tor_cor_d1_dwc1']] <- ggplot(tmp1_dwc1, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_dwc1$fac)), labels=levels(tmp1_dwc1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_dwc1$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_dwc1$fac))+.5), expand=FALSE) 

### Toronto, D2 DW-NOMINATE (Curated Variables, two 1D Models) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_dwc1 <- cortable_d2_dwc1 %>% 
  mutate(across(-c(1,2), ~-.x)) %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_dwc1$np))+.5
tmp2_dwc1 <- tmp2_dwc1 %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_dwc1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_dwc1$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['tor_cor_d2_dwc1']] <- ggplot(tmp2_dwc1, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_dwc1$fac)), labels=levels(tmp2_dwc1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_dwc1$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2_dwc1$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2_dwc1$fac))+.5), expand=FALSE) 


### Toronto, D1 DW-NOMINATE (Curated Variables, one 2D Model) ----
tmp1_dwca <- cortable_d1_dwca %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_dwca$np))-.5
tmp1_dwca <- tmp1_dwca %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_dwca %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['tor_cor_d1_dwca']] <- ggplot(tmp1_dwca, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_dwca$fac)), labels=levels(tmp1_dwca$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_dwca$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_dwca$fac))+.5), expand=FALSE) 

### Toronto, D2 DW-NOMINATE (Curated Variables, one 2D Model) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_dwca <- cortable_d2_dwca %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:10, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:10)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_dwca$np))+.5
tmp2_dwca <- tmp2_dwca %>% 
  pivot_longer(3:10, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_dwca %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_dwca$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['tor_cor_d2_dwca']] <- ggplot(tmp2_dwca, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_dwca$fac)), labels=levels(tmp2_dwca$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_dwca$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:8, labels=levels(tmp2_dwca$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 8.5), ylim=c(.5, max(as.numeric(tmp2_dwca$fac))+.5), expand=FALSE) 


tor_figures <- figures
save(tor_figures, tor_apre, seed, file="temp/toronto_figures.rda")
save.image("temp/toronto_complete.rda")

closeAllConnections() # Close connection to log file

